# coding:utf8

'''
#全国企业信用信息公示系统（云南）
#维护肖迪
'''
import pycurl
import urllib
import re
from utils import kill_captcha
import StringIO
import random
from bs4 import BeautifulSoup
import json
import table
import requests
import base64

auth = 'lum-customer-socialcredits-zone-gen:a98d2b7b4b0e'
from scpy.logger import get_logger

logger = get_logger(__file__)


def curl(url, data=''):  # 抓取函数[get,post]
    if data:
        return requests.post(url, data=data, timeout=60).content
    else:
        return requests.get(url, timeout=60).content


# proxies = {'http': 'http://'+auth+'@zproxy.luminati.io:22225'}
# headers = {'Proxy-Authorization': 'Basic '+base64.b64encode(auth.encode('utf-8')).decode('utf-8')}
def verify(name):
    global proxies, headers, detail_url
    loop_num = 0
    s = requests.Session()
    while 1:
        loop_num += 1
        try:

            code = s.get('http://gsxt.ynaic.gov.cn/notice/search/ent_info_list').content
            verify_url = "http://gsxt.ynaic.gov.cn/notice/captcha?preset=&ra=%d" % random.random()

            verify_image = s.get(verify_url).content
            # verify = kill_captcha(verify_image,'yn','jpeg')
            # print verify
            verify = 1
            code = re.findall('code:"(.+?)"', code.replace(' ', ''))[0]
            data = {"searchType": 1, "captcha": verify, "session.token": code, "condition.keyword": name}
            search_url = 'http://gsxt.ynaic.gov.cn/notice/search/ent_info_list'
            html = s.post(search_url, data=data).content

            try:
                detail_url = re.findall('<a href="(http://gsxt\.ynaic\.gov\.cn/notice/notice/view\?uuid=.+?)"', html)[0]
            except:
                return ''
            detail_html = s.get(detail_url).content
            return detail_html
        except Exception, ex:
            logger.exception(ex)
            if loop_num >= 20:
                logger.info('验证码尝试了20次，退出尝试')
                logger.error('保存word%s' % name)
                raise ValueError
                break
            logger.info('验证码错误，正在识别,错误次数%s' % loop_num)
            continue


def run(detail_html, **args):
    tables = re.findall('<table[\s\S]+?</table>', detail_html)
    for j in tables:
        word = re.findall('<th colspan="\d+?">(.+?)</th>', j)
        if '股东信息' in j:
            shareHolderList = table.index('股东信息', j)
            if shareHolderList:
                for i in shareHolderList:
                    share_url = re.findall('href="(.+?)"', i['shareHolderdetail'])
                    if share_url:
                        html = curl(share_url[0])
                        html = html.replace(' ', '')
                        subConam = re.findall('invt\.subConAm="(.*?)"', html)[0]
                        conDate = ""
                        fundedRatio = ""
                        regCapCur = re.findall('invt\.conForm="(.*?)"', html)[0]
                        country = ""
                        i['shareHolderdetail'] = share_url
                        i['subConam'] = subConam
                        i['conDate'] = conDate
                        i['fundedRatio'] = fundedRatio
                        i['regCapCur'] = regCapCur
                        i['country'] = country
                    else:
                        i['shareHolderdetail'] = ''
                        i['subConam'] = ''
                        i['conDate'] = ''
                        i['fundedRatio'] = ''
                        i['regCapCur'] = ''
                        i['country'] = ''
        try:
            if '基本信息' == word[0]:
                basicList = table.index(word[0].replace(' ', ''), j)
            # if '股东信息' == word[0]:
            #    shareHolderList = table.index('股东信息',j)
            if '主要人员信息' == word[0]:
                personList = table.index(word[0].replace(' ', ''), j)
            if '变更信息' == word[0]:
                alterList = table.index(word[0].replace(' ', ''), j)
            if '分支机构信息' == word[0]:
                filiationList = table.index(word[0].replace(' ', ''), j)
            if '清算信息' == word[0]:
                liquidationList = table.index(word[0].replace(' ', ''), j)
            if '经营异常' == word[0] or '经营异常信息' == word[0]:
                abnormalOperation = table.index(word[0].replace(' ', ''), j)
        except:
            # print word
            continue
    try:
        print basicList
    except:
        basicList = []
    try:
        print shareHolderList
    except:
        shareHolderList = []
    try:
        print personList
    except:
        personList = []
    try:
        print alterList
    except:
        alterList = []
    try:
        print filiationList
    except:
        filiationList = []
    try:
        print liquidationList
    except:
        liquidationList = []
    try:
        print abnormalOperation
    except:
        abnormalOperation = []

    punishBreakList = []
    punishedList = []
    alidebtList = []
    entinvItemList = []
    frinvList = []
    frPositionList = []
    caseInfoList = []
    sharesFrostList = []
    sharesImpawnList = []
    morDetailList = []
    morguaInfoList = []
    report_url = detail_url.replace('tab=01', 'tab=02')
    html = curl(report_url)
    report_url = re.findall('"(http://gsxt\.ynaic\.gov\.cn/notice/notice/view_annual.+?)" target="_blank">(\d+)', html)
    yearReportList = []
    yearList = []
    for i in report_url:
        url = i[0]
        year = i[1]
        html = curl(url)
        # print html
        table_list = re.findall('<table[\s\S]+?</table>', html)
        for j in table_list:
            if '企业基本信息' in j:
                report_basic = table.report_basic(j)
            if '网站或网店信息' in j:
                report_website = table.report_website(j)
            if '企业资产状况信息' in j:
                report_assetsInfo = table.report_assetsInfo(j)
            if '股东及出资信息' in j:
                report_investorInformations = table.report_investorInformations(j)
            if '股权变更信息' in j:
                report_equityChangeInformations = table.report_equityChangeInformations(j)
            if '修改记录' in j:
                report_changeRecords = table.report_changeRecords(j)
            try:
                print report_basic
            except:
                report_basic = {}
            try:
                print report_website
            except:
                report_website = {}
            try:
                print report_assetsInfo
            except:
                report_assetsInfo = {}
            try:
                print report_investorInformations
            except:
                report_investorInformations = []
            try:
                print report_equityChangeInformations
            except:
                report_equityChangeInformations = []
            try:
                print report_changeRecords
            except:
                report_changeRecords = []

        ditSource = {"year": year, "html": html}
        yearList.append(ditSource)
        dit1 = {"year": year, "baseInfo": report_basic, "website": report_website,
                "investorInformations": report_investorInformations, "assetsInfo": report_assetsInfo,
                "equityChangeInformations": report_equityChangeInformations, "changeRecords": report_changeRecords}
        yearReportList.append(dit1)
    alldata = {'province': 'yn', "abnormalOperation": abnormalOperation, "basicList": basicList,
               "shareHolderList": shareHolderList, "personList": personList, "punishBreakList": punishBreakList,
               "punishedList": punishedList, "alidebtList": alidebtList, "entinvItemList": entinvItemList,
               "frinvList": frinvList, "frPositionList": frPositionList, "alterList": alterList,
               "filiationList": filiationList, "caseInfoList": caseInfoList, "sharesFrostList": sharesFrostList,
               "sharesImpawnList": sharesImpawnList, "morDetailList": morDetailList, "morguaInfoList": morguaInfoList,
               "liquidationList": liquidationList, "yearReportList": yearReportList}
    if args.get('type') == 1:
        # print json.dumps(basicList,ensure_ascii=False,indent=4)
        html_source = {"province": "yn", "type": 0, "html": detail_html, "keyword": args.get('searchkey', "none"),
                       "companyName": basicList[0]['enterpriseName'], "yearList": yearList}
        companyUrl = {'province': "yn", "url": detail_url, "method": "get",
                      "companyName": basicList[0]['enterpriseName']}
        return (html_source, alldata, companyUrl)
    return alldata


def search(key):
    html = verify(key)
    if html:
        result = run(html)
        return result
    else:
        return {}


def search2(key):
    html = verify(key)
    if html:
        result = run(html, type=1, searchword=key)
        return result
    else:
        return {}


def search3(data):
    global detail_url
    url = data.get('url')
    detail_url = url
    html = curl(url)
    key = data.get("companyName", "")
    if html and key:
        result = run(html, searchkey=key, type=1, companyUrl=url)
        return result
    else:
        raise Exception("error")

if __name__ == "__main__":
    # data = {
    #     "province": "yn",
    #     "url": "http://gsxt.ynaic.gov.cn/notice/notice/view?uuid=FJnHSSp__qDY1nO7oOm683foSzZjNPqb&tab=01",
    #     "method": "get",
    #     "companyName": "云南自由互联科技有限公司"
    # }
    # print json.dumps(search2('云南自由互联科技有限公司'), ensure_ascii=False, indent=4)
    # print json.dumps(search3(data), ensure_ascii=False, indent=4)
    # # print json.dumps(search2('云南自由互联科技有限公司'),ensure_ascii=False,indent=4)
    # # 云南自由互联科技有限公司
    req_data = {"companyName" : "昆明建蓝大科饲料有限公司", "province" : "yn", "url" : "http://gsxt.ynaic.gov.cn/notice/notice/view?uuid=FJnHSSp__qAkjrk_sOHQP2cKQcObD_Tt&tab=01", "method" : "get" }
    print json.dumps(search3(req_data), ensure_ascii=False, indent=4)
